import treetaggerwrapper
import os
import csv 
import pickle

path=os.path.abspath(".")+ "/.."
tagger = treetaggerwrapper.TreeTagger(TAGLANG='en',TAGDIR=path)
inputTrain = 'train.csv'
inputTest  = 'test.csv'
trainFileName = "trainLematizedObj.txt"
testFileName  = "testLematizedObj.txt"

def tagALine(line):
	tags = tagger.TagText(line)
	resultLine = ""
	for item in tags:
		temp = item.split()
		value = ""
		if (temp[2]!="<unknown>"):
		 	value = temp[2]
		else:
			value = temp[0]
		resultLine = resultLine + value + " "
	# Write it to output 
	return resultLine


def lematized(inFileName,outFileName,flagTrain):
	adjust = 0 		
	if (flagTrain ==1):
		adjust = 1 
	# Read the infile 
	infile = open(inFileName)
	csvReader = csv.reader(infile, delimiter=",")
	resultMap = {}
	for line in csvReader:
		docID = line[adjust + 1] 
		senID = line[adjust + 2]
		sent  = line[adjust + 3]
		sent = sent.strip()
		sent = tagALine(sent)
		key = str(docID) + "_" + str(senID)
		resultMap[key] = sent	
		#print key + " " + sent + "\n"
	infile.close() 
	# Write object to outfile
	outFile = open(outFileName,'w')
	pickle.dump(resultMap,outFile)
	outFile.close()

lematized(inputTrain,trainFileName,1)
lematized(inputTest,testFileName,0)
